R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#1. Summary Analysis describing the number of 
#a. Columns
#b. rows 
#c. names of different columns
#d. The class types of the columns

#read.csv used to read the .csv file from system.
Employeedetails<-read.csv("D:/EDA/Problem4_EmployeeAtrition/DataSet-HR-Employee-Attritions.csv")

#No Of columns and rows in a dataset
ncols=ncol(Employeedetails)
cat("No of Columns in dataset=",ncols,"\n")
## No of Columns in dataset= 35
nrows=nrow(Employeedetails)
cat("No of rows in dataset=",nrows,"\n")
## No of rows in dataset= 1470
#class types of the columns in dataset
str(Employeedetails)
## 'data.frame':    1470 obs. of  35 variables:
##  $ Age                     : int  41 49 37 33 27 32 59 30 38 36 ...
##  $ Attrition               : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
##  $ BusinessTravel          : Factor w/ 3 levels "Non-Travel","Travel_Frequently",..: 3 2 3 2 3 2 3 3 2 3 ...
##  $ DailyRate               : int  1102 279 1373 1392 591 1005 1324 1358 216 1299 ...
##  $ Department              : Factor w/ 3 levels "Human Resources",..: 3 2 2 2 2 2 2 2 2 2 ...
##  $ DistanceFromHome        : int  1 8 2 3 2 2 3 24 23 27 ...
##  $ Education               : int  2 1 2 4 1 2 3 1 3 3 ...
##  $ EducationField          : Factor w/ 6 levels "Human Resources",..: 2 2 5 2 4 2 4 2 2 4 ...
##  $ EmployeeCount           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ EmployeeNumber          : int  1 2 4 5 7 8 10 11 12 13 ...
##  $ EnvironmentSatisfaction : int  2 3 4 4 1 4 3 4 4 3 ...
##  $ Gender                  : Factor w/ 2 levels "Female","Male": 1 2 2 1 2 2 1 2 2 2 ...
##  $ HourlyRate              : int  94 61 92 56 40 79 81 67 44 94 ...
##  $ JobInvolvement          : int  3 2 2 3 3 3 4 3 2 3 ...
##  $ JobLevel                : int  2 2 1 1 1 1 1 1 3 2 ...
##  $ JobRole                 : Factor w/ 9 levels "Healthcare Representative",..: 8 7 3 7 3 3 3 3 5 1 ...
##  $ JobSatisfaction         : int  4 2 3 3 2 4 1 3 3 3 ...
##  $ MaritalStatus           : Factor w/ 3 levels "Divorced","Married",..: 3 2 3 2 2 3 2 1 3 2 ...
##  $ MonthlyIncome           : int  5993 5130 2090 2909 3468 3068 2670 2693 9526 5237 ...
##  $ MonthlyRate             : int  19479 24907 2396 23159 16632 11864 9964 13335 8787 16577 ...
##  $ NumCompaniesWorked      : int  8 1 6 1 9 0 4 1 0 6 ...
##  $ Over18                  : Factor w/ 1 level "Y": 1 1 1 1 1 1 1 1 1 1 ...
##  $ OverTime                : Factor w/ 2 levels "No","Yes": 2 1 2 2 1 1 2 1 1 1 ...
##  $ PercentSalaryHike       : int  11 23 15 11 12 13 20 22 21 13 ...
##  $ PerformanceRating       : int  3 4 3 3 3 3 4 4 4 3 ...
##  $ RelationshipSatisfaction: int  1 4 2 3 4 3 1 2 2 2 ...
##  $ StandardHours           : int  80 80 80 80 80 80 80 80 80 80 ...
##  $ StockOptionLevel        : int  0 1 0 0 1 0 3 1 0 2 ...
##  $ TotalWorkingYears       : int  8 10 7 8 6 8 12 1 10 17 ...
##  $ TrainingTimesLastYear   : int  0 3 3 3 3 2 3 2 2 3 ...
##  $ WorkLifeBalance         : int  1 3 3 3 3 2 2 3 3 2 ...
##  $ YearsAtCompany          : int  6 10 0 8 2 7 1 1 9 7 ...
##  $ YearsInCurrentRole      : int  4 7 0 7 2 7 0 0 7 7 ...
##  $ YearsSinceLastPromotion : int  0 1 0 3 2 3 0 0 1 7 ...
##  $ YearsWithCurrManager    : int  5 7 0 0 2 6 0 0 8 7 ...
summary(Employeedetails)
##       Age        Attrition            BusinessTravel   DailyRate     
##  Min.   :18.00   No :1233   Non-Travel       : 150   Min.   : 102.0  
##  1st Qu.:30.00   Yes: 237   Travel_Frequently: 277   1st Qu.: 465.0  
##  Median :36.00              Travel_Rarely    :1043   Median : 802.0  
##  Mean   :36.92                                       Mean   : 802.5  
##  3rd Qu.:43.00                                       3rd Qu.:1157.0  
##  Max.   :60.00                                       Max.   :1499.0  
##                                                                      
##                   Department  DistanceFromHome   Education    
##  Human Resources       : 63   Min.   : 1.000   Min.   :1.000  
##  Research & Development:961   1st Qu.: 2.000   1st Qu.:2.000  
##  Sales                 :446   Median : 7.000   Median :3.000  
##                               Mean   : 9.193   Mean   :2.913  
##                               3rd Qu.:14.000   3rd Qu.:4.000  
##                               Max.   :29.000   Max.   :5.000  
##                                                               
##           EducationField EmployeeCount EmployeeNumber  
##  Human Resources : 27    Min.   :1     Min.   :   1.0  
##  Life Sciences   :606    1st Qu.:1     1st Qu.: 491.2  
##  Marketing       :159    Median :1     Median :1020.5  
##  Medical         :464    Mean   :1     Mean   :1024.9  
##  Other           : 82    3rd Qu.:1     3rd Qu.:1555.8  
##  Technical Degree:132    Max.   :1     Max.   :2068.0  
##                                                        
##  EnvironmentSatisfaction    Gender      HourlyRate     JobInvolvement
##  Min.   :1.000           Female:588   Min.   : 30.00   Min.   :1.00  
##  1st Qu.:2.000           Male  :882   1st Qu.: 48.00   1st Qu.:2.00  
##  Median :3.000                        Median : 66.00   Median :3.00  
##  Mean   :2.722                        Mean   : 65.89   Mean   :2.73  
##  3rd Qu.:4.000                        3rd Qu.: 83.75   3rd Qu.:3.00  
##  Max.   :4.000                        Max.   :100.00   Max.   :4.00  
##                                                                      
##     JobLevel                          JobRole    JobSatisfaction
##  Min.   :1.000   Sales Executive          :326   Min.   :1.000  
##  1st Qu.:1.000   Research Scientist       :292   1st Qu.:2.000  
##  Median :2.000   Laboratory Technician    :259   Median :3.000  
##  Mean   :2.064   Manufacturing Director   :145   Mean   :2.729  
##  3rd Qu.:3.000   Healthcare Representative:131   3rd Qu.:4.000  
##  Max.   :5.000   Manager                  :102   Max.   :4.000  
##                  (Other)                  :215                  
##   MaritalStatus MonthlyIncome    MonthlyRate    NumCompaniesWorked
##  Divorced:327   Min.   : 1009   Min.   : 2094   Min.   :0.000     
##  Married :673   1st Qu.: 2911   1st Qu.: 8047   1st Qu.:1.000     
##  Single  :470   Median : 4919   Median :14236   Median :2.000     
##                 Mean   : 6503   Mean   :14313   Mean   :2.693     
##                 3rd Qu.: 8379   3rd Qu.:20462   3rd Qu.:4.000     
##                 Max.   :19999   Max.   :26999   Max.   :9.000     
##                                                                   
##  Over18   OverTime   PercentSalaryHike PerformanceRating
##  Y:1470   No :1054   Min.   :11.00     Min.   :3.000    
##           Yes: 416   1st Qu.:12.00     1st Qu.:3.000    
##                      Median :14.00     Median :3.000    
##                      Mean   :15.21     Mean   :3.154    
##                      3rd Qu.:18.00     3rd Qu.:3.000    
##                      Max.   :25.00     Max.   :4.000    
##                                                         
##  RelationshipSatisfaction StandardHours StockOptionLevel TotalWorkingYears
##  Min.   :1.000            Min.   :80    Min.   :0.0000   Min.   : 0.00    
##  1st Qu.:2.000            1st Qu.:80    1st Qu.:0.0000   1st Qu.: 6.00    
##  Median :3.000            Median :80    Median :1.0000   Median :10.00    
##  Mean   :2.712            Mean   :80    Mean   :0.7939   Mean   :11.28    
##  3rd Qu.:4.000            3rd Qu.:80    3rd Qu.:1.0000   3rd Qu.:15.00    
##  Max.   :4.000            Max.   :80    Max.   :3.0000   Max.   :40.00    
##                                                                           
##  TrainingTimesLastYear WorkLifeBalance YearsAtCompany   YearsInCurrentRole
##  Min.   :0.000         Min.   :1.000   Min.   : 0.000   Min.   : 0.000    
##  1st Qu.:2.000         1st Qu.:2.000   1st Qu.: 3.000   1st Qu.: 2.000    
##  Median :3.000         Median :3.000   Median : 5.000   Median : 3.000    
##  Mean   :2.799         Mean   :2.761   Mean   : 7.008   Mean   : 4.229    
##  3rd Qu.:3.000         3rd Qu.:3.000   3rd Qu.: 9.000   3rd Qu.: 7.000    
##  Max.   :6.000         Max.   :4.000   Max.   :40.000   Max.   :18.000    
##                                                                           
##  YearsSinceLastPromotion YearsWithCurrManager
##  Min.   : 0.000          Min.   : 0.000      
##  1st Qu.: 0.000          1st Qu.: 2.000      
##  Median : 1.000          Median : 3.000      
##  Mean   : 2.188          Mean   : 4.123      
##  3rd Qu.: 3.000          3rd Qu.: 7.000      
##  Max.   :15.000          Max.   :17.000      
## 
#2.Check the Data Quality of the data. If you realize there are null values in it, use Data Imputation for filling the data. Justify the methodology you follow for Data Imputaion. 
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
i=1
while(i<=ncol(Employeedetails))
{
  if(class(Employeedetails)=="int"){
Employeedetails[,i][is.na(Employeedetails[,i])]<-median(Employeedetails[,i],na.rm = TRUE)
  }else{
    
Employeedetails[,i]<-with(Employeedetails,impute(Employeedetails[,i],'random'))
  }
i=i+1
}
#3.Do Univariate Analysis of all the variables present
#bar graph of Gender

summary(Employeedetails$Gender)
## Female   Male 
##    588    882
Female <- subset(Employeedetails,Employeedetails$Gender == "Female")
male <- subset(Employeedetails,Employeedetails$Gender == "Male")
Femalel <- nrow(Female)
malel <- nrow(male)
slices = c(Femalel,malel)
lbls <- c("Female","male")
xx = barplot(slices, ylab="No of Employees ",col= "blue",border = "black",main = "Gender  of Employees ")
text(x = xx, y = slices,label = slices, pos = 3, cex = 0.8, col = "red")
axis(1, at=xx, label=lbls, tick=FALSE, las=3, line=-0.5, cex.axis=0.9)

# bargraph of MaritalStatus
summary(Employeedetails$MaritalStatus)
## Divorced  Married   Single 
##      327      673      470
Married <- subset(Employeedetails,Employeedetails$MaritalStatus == "Married")
Single <- subset(Employeedetails,Employeedetails$MaritalStatus == "Single")
Divorced <- subset(Employeedetails,Employeedetails$MaritalStatus == "Divorced")
Marriedl <- nrow(Married)
Singlel <- nrow(Single)
Divorcedl <- nrow(Divorced)
slices = c(Marriedl,Singlel,Divorcedl)
lbls <- c("Married","Single","Divorcedl ")
xx = barplot(slices, ylab="No of Employees ",col= "blue",border = "black",main = "MaritalStatus  of Employees ")
text(x = xx, y = slices,label = slices, pos = 3, cex = 0.8, col = "red")
axis(1, at=xx, label=lbls, tick=FALSE, las=3, line=-0.5, cex.axis=0.9)

#bar graph of Attrition
Attritionyes <- subset(Employeedetails , Employeedetails$Attrition == "Yes")
AttritionNO <- subset(Employeedetails ,  Employeedetails$Attrition == "No")
Attritionyesl <- nrow(Attritionyes)
AttritionNOl <- nrow(AttritionNO)
slices = c(Attritionyesl,AttritionNOl)
lbls <- c("Attritionyes","AttritionNO")
xx = barplot(slices, ylab="No of Employees ",col= "blue",border = "black",main = "Attrition of Employees ")
text(x = xx, y = slices,label = slices, pos = 3, cex = 0.8, col = "red")
axis(1, at=xx, label=lbls, tick=FALSE, las=3, line=-0.5, cex.axis=0.9)

summary(Employeedetails$OverTime)
##   No  Yes 
## 1054  416
Yes <- subset(Employeedetails,Employeedetails$OverTime == "Yes")
No <- subset(Employeedetails,Employeedetails$OverTime == "No")
Yesl <- nrow(Yes)
Nol <- nrow(No)
slices = c(Yesl,Nol)
lbls <- c("Yes","No")
xx = barplot(slices, ylab="No of employee ",col= "blue",border = "black",main = "OverTime  of Employees ")
text(x = xx, y = slices,label = slices, pos = 3, cex = 0.8, col = "red")
axis(1, at=xx, label=lbls, tick=FALSE, las=1, line=-0.1, cex.axis=0.9)

hist(Employeedetails$Age ,main="Age of empolyee", xlab="No of people", 
     border="red",col="blue")

hist(Employeedetails$DailyRate ,main="Daily rate of empolyee", xlab="No of people", 
     border="green",col="red")

hist(Employeedetails$DistanceFromHome ,main="DistanceFromHome of empolyee", xlab="No of people", 
     border="aquamarine",col="yellow")

hist(Employeedetails$Education ,main="Education of empolyee", xlab="No of people", 
     border="darkmagenta",col="aquamarine")

hist(Employeedetails$EmployeeCount ,main="Employee Count", xlab="No of people", 
     border="darkmagenta",col="green")

hist(Employeedetails$EmployeeNumber ,main="EmployeeNumber", xlab="No of people", 
     border="darkmagenta",col="dodgerblue")

hist(Employeedetails$EnvironmentSatisfaction ,main="EnvironmentSatisfaction of empolyee", xlab="No of people", 
     border="green2",col="gold")

hist(Employeedetails$HourlyRate ,main="HourlyRate of empolyee", xlab="No of people", 
     border="darkmagenta",col="deeppink2")

hist(Employeedetails$JobInvolvement ,main="JobInvolvement of empolyee", xlab="No of people", 
     border="green2",col="khaki1")

hist(Employeedetails$JobLevel ,main="JobLevel of empolyee", xlab="No of people", 
     border="darkmagenta",col="hotpink")

hist(Employeedetails$JobSatisfaction ,main="JobSatisfaction of empolyee", xlab="No of people", 
     border="darkmagenta",col="lightgoldenrod")

hist(Employeedetails$MonthlyIncome ,main="MonthlyIncome of empolyee", xlab="No of people", 
     border="darkmagenta",col="orangered")

hist(Employeedetails$MonthlyRate ,main="MonthlyRate of empolyee", xlab="No of people", 
     border="darkmagenta",col="palegreen")

hist(Employeedetails$NumCompaniesWorked ,main="NumCompaniesWorked of empolyee", xlab="No of people", 
     border="darkmagenta",col="seagreen1")

hist(Employeedetails$PercentSalaryHike ,main="PercentSalaryHike of empolyee", xlab="No of people", 
     border="darkmagenta",col="tan3")

hist(Employeedetails$RelationshipSatisfaction ,main="RelationshipSatisfaction of empolyee", xlab="No of people", 
     border="red",col="blue")

hist(Employeedetails$StandardHours ,main="StandardHours of empolyee", xlab="No of people", 
     border="green",col="red")

hist(Employeedetails$StockOptionLevel ,main="StockOptionLevel of empolyee", xlab="No of people", 
     border="aquamarine",col="yellow")

hist(Employeedetails$TotalWorkingYears ,main="TotalWorkingYears of empolyee", xlab="No of people", 
     border="darkmagenta",col="aquamarine")

hist(Employeedetails$TrainingTimesLastYear ,main="TrainingTimesLastYear of Employee", xlab="No of people", 
     border="darkmagenta",col="green")

hist(Employeedetails$WorkLifeBalance ,main="WorkLifeBalance", xlab="No of people", 
     border="darkmagenta",col="dodgerblue")

hist(Employeedetails$YearsAtCompany ,main="EnvironmentSatisfaction of empolyee", xlab="No of people", 
     border="green2",col="gold")

hist(Employeedetails$YearsInCurrentRole ,main="YearsInCurrentRole of empolyee", xlab="No of people", 
     border="darkmagenta",col="deeppink2")

hist(Employeedetails$YearsSinceLastPromotion ,main="YearsSinceLastPromotion of empolyee", xlab="No of people", 
     border="green2",col="khaki1")

hist(Employeedetails$YearsWithCurrManager ,main="YearsWithCurrManager of empolyee", xlab="No of people", 
     border="darkmagenta",col="hotpink")

#4.The target variable in this dataset is the attribute "Attrition" Do a bivariate analysis of this variable with all the other attributes. Also give your inferences from the graph generated.
plot(Employeedetails$Attrition,Employeedetails$Age , main = "Impact of age on Attrition", col="lightgoldenrod")

plot(Employeedetails$Attrition,Employeedetails$BusinessTravel,main = "Impact of BusinessTravel on Attrition",  col="khaki1")

plot(Employeedetails$Attrition,Employeedetails$DailyRate , main = "Impact of DailyRate on Attrition",col="green")

plot(Employeedetails$Attrition,Employeedetails$Department , main = "Impact of Department on Attrition",col="red")

plot(Employeedetails$Attrition,Employeedetails$DistanceFromHome ,main = "Impact of DistanceFromHome on Attrition", col="yellow")

plot(Employeedetails$Attrition,Employeedetails$Education , main = "Impact of Education on Attrition",col="lightgoldenrod")

plot(Employeedetails$Attrition,Employeedetails$EducationField ,main = "Impact of BusinessTravel on Attrition", col="hotpink")

plot(Employeedetails$Attrition,Employeedetails$EmployeeCount ,main = "Impact of EmployeeCount on Attrition", col="seagreen1")

plot(Employeedetails$Attrition,Employeedetails$EmployeeNumber, main = "Impact of EmployeeNumber on Attrition",col="hotpink")

plot(Employeedetails$Attrition,Employeedetails$EnvironmentSatisfaction ,main = "Impact of EnvironmentSatisfaction on Attrition", col="lightgoldenrod")

plot(Employeedetails$Attrition,Employeedetails$Gender ,main = "Impact of Gender on Attrition", col="khaki1")

plot(Employeedetails$Attrition,Employeedetails$HourlyRate , main = "Impact of HourlyRate on Attrition",col="dodgerblue")

plot(Employeedetails$Attrition,Employeedetails$JobInvolvement ,main = "Impact of JobInvolvement on Attrition", col="lightgoldenrod")

plot(Employeedetails$Attrition,Employeedetails$JobLevel , main = "Impact of JobLevel on Attrition",col="dodgerblue")

plot(Employeedetails$Attrition,Employeedetails$JobRole , main = "Impact of JobRole on Attrition",col="seagreen1")

plot(Employeedetails$Attrition,Employeedetails$JobSatisfaction , main = "Impact of JobSatisfaction on Attrition",col="lightgoldenrod")

plot(Employeedetails$Attrition,Employeedetails$MaritalStatus ,main = "Impact of MaritalStatus on Attrition", col="seagreen1")

plot(Employeedetails$Attrition,Employeedetails$MonthlyIncome ,main = "Impact of MonthlyIncome on Attrition", col="hotpink")

plot(Employeedetails$Attrition,Employeedetails$MonthlyRate ,main = "Impact of MonthlyRate on Attrition", col="lightgoldenrod")

plot(Employeedetails$Attrition,Employeedetails$NumCompaniesWorked ,main = "Impact of NumCompaniesWorked on Attrition", col="dodgerblue")

plot(Employeedetails$Attrition,Employeedetails$Over18 ,main = "Impact of Over18 on Attrition", col="peru")

plot(Employeedetails$Attrition,Employeedetails$OverTime ,main = "Impact of OverTime on Attrition", col="lightgoldenrod")

plot(Employeedetails$Attrition,Employeedetails$PercentSalaryHike ,main = "Impact of PercentSalaryHike on Attrition", col="hotpink")

plot(Employeedetails$Attrition,Employeedetails$PerformanceRating,main = "Impact of PerformanceRating on Attrition", col="khaki1")

plot(Employeedetails$Attrition,Employeedetails$RelationshipSatisfaction ,main = "Impact of RelationshipSatisfaction on Attrition", col="green")

plot(Employeedetails$Attrition,Employeedetails$StandardHours,main = "Impact of StandardHours on Attrition", col="red")

plot(Employeedetails$Attrition,Employeedetails$StockOptionLevel ,main = "Impact of StockOptionLevel on Attrition", col="dodgerblue")

plot(Employeedetails$Attrition,Employeedetails$TotalWorkingYears ,main = "Impact of TotalWorkingYears on Attrition", col="yellow")

plot(Employeedetails$Attrition,Employeedetails$TrainingTimesLastYear ,main = "Impact of TrainingTimesLastYear on Attrition", col="lightgoldenrod")

plot(Employeedetails$Attrition,Employeedetails$WorkLifeBalance ,main = "Impact of WorkLifeBalance on Attrition", col="hotpink")

plot(Employeedetails$Attrition,Employeedetails$YearsAtCompany ,main = "Impact of YearsAtCompany on Attrition", col="seagreen1")

plot(Employeedetails$Attrition,Employeedetails$YearsInCurrentRole,main = "Impact of YearsInCurrentRole on Attrition", col="hotpink")

plot(Employeedetails$Attrition,Employeedetails$YearsSinceLastPromotion ,main = "Impact of YearsSinceLastPromotion on Attrition", col="lightgoldenrod")

plot(Employeedetails$Attrition,Employeedetails$YearsWithCurrManager ,main = "Impact of YearsWithCurrManager on Attrition", col="khaki1")

#5. Draw a multivariate graph for all the variables
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(cluster)
Employeedetails[] <- lapply(Employeedetails, function(x) as.numeric(x))
res.dist <- get_dist(Employeedetails, stand = TRUE, method = "euclidean")
fviz_dist(res.dist, 
   gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))

#6. Apply PCA on this dataset after removing categorical variables
Employeedetailss=Employeedetails
#Employeedetailss[] <- lapply(Employeedetailss, function(x) as.numeric(x))
Employeedetailss[,-which(sapply(Employeedetailss, class) == "factor")]
## data frame with 0 columns and 1470 rows
dt = sort(sample(nrow(Employeedetailss), nrow(Employeedetailss)*.7))
trainEmployeedetails<-Employeedetailss[dt,]
testEmployeedetails<-Employeedetailss[-dt,]

EmployeedetailsPCA<-princomp(testEmployeedetails)
names(EmployeedetailsPCA)
## [1] "sdev"     "loadings" "center"   "scale"    "n.obs"    "scores"  
## [7] "call"
EmployeedetailsPCA
## Call:
## princomp(x = testEmployeedetails)
## 
## Standard deviations:
##       Comp.1       Comp.2       Comp.3       Comp.4       Comp.5 
## 7.265972e+03 4.684087e+03 6.124753e+02 4.070744e+02 2.025643e+01 
##       Comp.6       Comp.7       Comp.8       Comp.9      Comp.10 
## 8.887913e+00 7.894123e+00 6.550715e+00 3.646198e+00 3.550481e+00 
##      Comp.11      Comp.12      Comp.13      Comp.14      Comp.15 
## 2.463871e+00 2.361747e+00 2.192460e+00 1.886138e+00 1.804316e+00 
##      Comp.16      Comp.17      Comp.18      Comp.19      Comp.20 
## 1.346739e+00 1.264687e+00 1.142857e+00 1.084024e+00 1.066242e+00 
##      Comp.21      Comp.22      Comp.23      Comp.24      Comp.25 
## 9.757728e-01 9.505826e-01 7.175354e-01 6.608063e-01 6.075385e-01 
##      Comp.26      Comp.27      Comp.28      Comp.29      Comp.30 
## 4.875494e-01 4.561585e-01 4.313434e-01 4.012937e-01 3.075279e-01 
##      Comp.31      Comp.32      Comp.33      Comp.34      Comp.35 
## 2.846456e-01 2.137523e-01 7.765618e-07 3.840661e-07 0.000000e+00 
## 
##  35  variables and  441 observations.
biplot(EmployeedetailsPCA,scale = 0)
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

message=FALSE
#7. After applying PCA, draw inferences from the Principal Components generated using appropriate plots. Describe your inferences

#----------Scree plot example-----------

pr_var<-(EmployeedetailsPCA$sdev)**2

#check to see if all the numeric variables

str(EmployeedetailsPCA)
## List of 7
##  $ sdev    : Named num [1:35] 7266 4684.1 612.5 407.1 20.3 ...
##   ..- attr(*, "names")= chr [1:35] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ loadings: loadings [1:35, 1:35] 1.05e-04 -1.45e-06 -2.15e-06 -2.28e-03 5.48e-07 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:35] "Age" "Attrition" "BusinessTravel" "DailyRate" ...
##   .. ..$ : chr [1:35] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ center  : Named num [1:35] 36.73 1.17 2.65 814.47 2.28 ...
##   ..- attr(*, "names")= chr [1:35] "Age" "Attrition" "BusinessTravel" "DailyRate" ...
##  $ scale   : Named num [1:35] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "names")= chr [1:35] "Age" "Attrition" "BusinessTravel" "DailyRate" ...
##  $ n.obs   : int 441
##  $ scores  : num [1:441, 1:35] 11064 -2106 -4033 2770 1177 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:441] "2" "6" "7" "10" ...
##   .. ..$ : chr [1:35] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ call    : language princomp(x = testEmployeedetails)
##  - attr(*, "class")= chr "princomp"
#compute variance

#variance of first 10 components
pr_var[1:10]
##       Comp.1       Comp.2       Comp.3       Comp.4       Comp.5 
## 5.279434e+07 2.194068e+07 3.751260e+05 1.657096e+05 4.103228e+02 
##       Comp.6       Comp.7       Comp.8       Comp.9      Comp.10 
## 7.899499e+01 6.231717e+01 4.291187e+01 1.329476e+01 1.260592e+01
#Aim of PCA to find the components which explain the maximum variance 
#compute the proportion of variance explained by each component.
rot_var<-pr_var/sum(pr_var)

rot_var[1:10]
##       Comp.1       Comp.2       Comp.3       Comp.4       Comp.5 
## 7.013389e-01 2.914678e-01 4.983307e-03 2.201345e-03 5.450874e-06 
##       Comp.6       Comp.7       Comp.8       Comp.9      Comp.10 
## 1.049398e-06 8.278435e-07 5.700566e-07 1.766123e-07 1.674615e-07
#plot the scree plotto pick the PCAs which can give maximum variance

plot(rot_var,xlab = "price Components",ylab = "Proportion of variance explained",type = "b",main = "Scree Plot")

# -----------cumulative scree plot ---------------------------------------------------

plot(cumsum(rot_var),xlab = "price Components",ylab = "cumulative Proportion of variance explained",type = "b",main="Cumulative Scree Plot")

# -----------score plot --------------------------------------------------------------

plot(EmployeedetailsPCA$scores[,1],EmployeedetailsPCA$scores[,2],main="score plot")

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.